# Replication file
# EVS noise models in "Fixed Effects and Post-Treatment Bias in Legacy Studies"

rm(list=ls())
set.seed(12435)
library(geosphere); library(geostatsp); library(mvnfast); library(mvtnorm)

# load data
evs <- read.csv("evs_noise.csv")

# calculate distances based on long and lat in data frame
evs_distances <- distm(evs[,c('lon','lat')], fun=distGeo)

# rescale distances from m to km
evs_distances <- evs_distances/1000

# # # FIRST WITH 3 DEGREES
matern_covar <- matern(evs_distances, param = c(range=3, shape=1, variance=1))

# generate noisy distance using matern covariance
set.seed(2)
noisy_dist <- rmvnorm(n = 1000, mean = rep(0, nrow(matern_covar)), sigma = matern_covar)
noisy_dist <- t(noisy_dist)

# G-ESTIMATOR
pvals1g <- pvals2g <- pvals3g <- NULL; 
for (i in 1:1000){
  evs$distn<- noisy_dist[,i]
  mod1f <- lm(intolerance~
                distn+prop_jewish25+unemployment33+population25+nazishare33+
                lr+immigrants07+unemployment07+unemp+educ+female+age+urban_scale,evs)
  mod1s<-lm(I(intolerance - 
                coef(mod1f)['immigrants07']*immigrants07 - coef(mod1f)['lr']*lr - coef(mod1f)['unemp']*unemp -  
                coef(mod1f)['unemployment07']*unemployment07 - coef(mod1f)['educ']*educ - coef(mod1f)['urban_scale']*urban_scale) ~ 
              distn+prop_jewish25+unemployment33+population25+nazishare33, evs)
  pvals1g <- c(pvals1g, summary(mod1s)$coefficients[2,4])
  
  mod2f <- lm(resentment~
                distn+prop_jewish25+unemployment33+population25+nazishare33+
                lr+immigrants07+unemployment07+unemp+educ+female+age+urban_scale,evs)
  mod2s<-lm(I(resentment - 
                coef(mod2f)['immigrants07']*immigrants07 - coef(mod2f)['lr']*lr - coef(mod2f)['unemp']*unemp -  
                coef(mod2f)['unemployment07']*unemployment07 - coef(mod2f)['educ']*educ - coef(mod2f)['urban_scale']*urban_scale) ~ 
              distn+prop_jewish25+unemployment33+population25+nazishare33, evs)
  pvals2g <- c(pvals2g, summary(mod2s)$coefficients[2,4])
  
  mod3f <- lm(far_right~
                distn+prop_jewish25+unemployment33+population25+nazishare33+
                lr+immigrants07+unemployment07+unemp+educ+female+age+urban_scale,evs)
  mod3s<-lm(I(far_right - 
                coef(mod3f)['immigrants07']*immigrants07 - coef(mod3f)['lr']*lr - coef(mod3f)['unemp']*unemp -  
                coef(mod3f)['unemployment07']*unemployment07 - coef(mod3f)['educ']*educ - coef(mod3f)['urban_scale']*urban_scale) ~ 
              distn+prop_jewish25+unemployment33+population25+nazishare33, evs)
  pvals3g <- c(pvals3g, summary(mod3s)$coefficients[2,4])
}

# get original pvalues
original_pv1 <- 3.67e-06
original_pv2 <- 4.55e-10
original_pv3 <- 0.00328


### 
### FIGURE SM6.3
###
pdf("noise_evs_mod1_3deg.pdf", height=8, width=8)
hist(pvals1g, breaks=100, main="", xlab="p-value")
abline(v=original_pv1, col="firebrick", lwd=2)
dev.off()
pdf("noise_evs_mod2_3deg.pdf", height=8, width=8)
hist(pvals2g, breaks=100, main="", xlab="p-value")
abline(v=original_pv2, col="firebrick", lwd=2)
dev.off()
pdf("noise_evs_mod3_3deg.pdf", height=8, width=8)
hist(pvals3g, breaks=100, main="", xlab="p-value")
abline(v=original_pv3, col="firebrick", lwd=2)
dev.off()


###
### TABLE SM6.1, part 3
###
table(pvals1g<original_pv1)/1000
table(pvals2g<original_pv2)/1000
table(pvals3g<original_pv3)/1000


# # SAME WITH 5 DEGREES
matern_covar <- matern(evs_distances, param = c(range=5, shape=1, variance=1))

set.seed(4532)
noisy_dist <- rmvnorm(n = 1000, mean = rep(0, nrow(matern_covar)), sigma = matern_covar)
noisy_dist <- t(noisy_dist)

# G-ESTIMATOR
pvals1g <- pvals2g <- pvals3g <- NULL; 
for (i in 1:1000){
  evs$distn<- noisy_dist[,i]
  mod1f <- lm(intolerance~
                distn+prop_jewish25+unemployment33+population25+nazishare33+
                lr+immigrants07+unemployment07+unemp+educ+female+age+urban_scale,evs)
  mod1s<-lm(I(intolerance - 
                coef(mod1f)['immigrants07']*immigrants07 - coef(mod1f)['lr']*lr - coef(mod1f)['unemp']*unemp -  
                coef(mod1f)['unemployment07']*unemployment07 - coef(mod1f)['educ']*educ - coef(mod1f)['urban_scale']*urban_scale) ~ 
              distn+prop_jewish25+unemployment33+population25+nazishare33, evs)
  pvals1g <- c(pvals1g, summary(mod1s)$coefficients[2,4])
  
  mod2f <- lm(resentment~
                distn+prop_jewish25+unemployment33+population25+nazishare33+
                lr+immigrants07+unemployment07+unemp+educ+female+age+urban_scale,evs)
  mod2s<-lm(I(resentment - 
                coef(mod2f)['immigrants07']*immigrants07 - coef(mod2f)['lr']*lr - coef(mod2f)['unemp']*unemp -  
                coef(mod2f)['unemployment07']*unemployment07 - coef(mod2f)['educ']*educ - coef(mod2f)['urban_scale']*urban_scale) ~ 
              distn+prop_jewish25+unemployment33+population25+nazishare33, evs)
  pvals2g <- c(pvals2g, summary(mod2s)$coefficients[2,4])
  
  mod3f <- lm(far_right~
                distn+prop_jewish25+unemployment33+population25+nazishare33+
                lr+immigrants07+unemployment07+unemp+educ+female+age+urban_scale,evs)
  mod3s<-lm(I(far_right - 
                coef(mod3f)['immigrants07']*immigrants07 - coef(mod3f)['lr']*lr - coef(mod3f)['unemp']*unemp -  
                coef(mod3f)['unemployment07']*unemployment07 - coef(mod3f)['educ']*educ - coef(mod3f)['urban_scale']*urban_scale) ~ 
              distn+prop_jewish25+unemployment33+population25+nazishare33, evs)
  pvals3g <- c(pvals3g, summary(mod3s)$coefficients[2,4])
}


### 
### FIGURE SM6.4
###
pdf("noise_evs_mod1_5deg.pdf", height=8, width=8)
hist(pvals1g, breaks=100, main="", xlab="p-value")
abline(v=original_pv1, col="firebrick", lwd=2)
dev.off()
pdf("noise_evs_mod2_5deg.pdf", height=8, width=8)
hist(pvals2g, breaks=100, main="", xlab="p-value")
abline(v=original_pv2, col="firebrick", lwd=2)
dev.off()
pdf("noise_evs_mod3_5deg.pdf", height=8, width=8)
hist(pvals3g, breaks=100, main="", xlab="p-value")
abline(v=original_pv3, col="firebrick", lwd=2)
dev.off()


###
### TABLE SM6.1, part 4
###
table(pvals1g<original_pv1)/1000
table(pvals2g<original_pv2)/1000
table(pvals3g<original_pv3)/1000

### 3 degrees
# Mod1: 5.1%
# Mod2: 0.4%
# Mod3: 1.1%

### 5 degrees
# Mod1: 4.5%
# Mod2: 0.9%
# Mod3: 1.1%
